import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import re
import plotly.express as px
import plotly.graph_objects as go
import datetime
from scipy import stats
import numpy as np
import statsmodels.api as sm
from statsmodels.formula.api import ols
import warnings
warnings.filterwarnings('ignore')
path = r'D:\\NUS\\Seat_study1\\output_data\\'
anthro_path = r'D:\\NUS\\Seat_study1\\anthro_data\\'
# Helper plotting functions
def scatter_ax(ax, x, y, xlabel, title):
ax.scatter(x, y, alpha=0.6)
X = sm.add_constant(np.asarray(x,float))
display(sm.OLS(np.asarray(y,float), X).fit().summary())
beta = sm.OLS(np.asarray(y,float), X).fit().params
xx = np.linspace(min(x), max(x), 100)
yy = beta[0] + beta[1]*xx
ax.plot(xx, yy, "--", color="black", linewidth=1.2, alpha=0.8)
ax.set_xlabel(xlabel)
ax.set_ylabel("Discomfort increase per hour (slope)")
ax.set_title(title)
ax.grid(True, alpha=0.3)
df_ratings_all = pd.read_csv(path + 'df_ratings_all.csv')
df_ratings_all.slope_hour = df_ratings_all.slope_hour/10
df_ratings_all[:2]
| rating1 | rating2 | rating3 | rating4 | rating5 | rating6 | rating7 | rating8 | rating9 | rating10 | ... | intercept | r-square | rmse | Significant | ll | ul | slope_hour | slope_direction | max_rating | slope_group | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 10.0 | 10.0 | 10.0 | 10.0 | 11.0 | 11.0 | 11.0 | 10.0 | 12.0 | 12.0 | ... | 8.752 | 0.75784 | 1.39106 | True | 0.0159 | 0.0281 | 0.132 | P | 20.0 | Below_3.1hrs |
| 1 | 10.0 | 10.0 | 10.0 | 10.0 | 10.0 | 10.0 | 12.0 | 10.0 | 12.0 | 13.0 | ... | 9.515 | 0.74473 | 0.97788 | True | 0.0107 | 0.0193 | 0.090 | P | 16.0 | Below_3.1hrs |
2 rows × 60 columns
df_ratings_all.slope_hour.describe()
count 208.000000 mean 0.177577 std 0.207520 min -0.372000 25% 0.024000 50% 0.102000 75% 0.282000 max 0.936000 Name: slope_hour, dtype: float64
df_ratings_all[['slope_hour' , 'Seat_angle']].info()
<class 'pandas.DataFrame'> RangeIndex: 208 entries, 0 to 207 Data columns (total 2 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 slope_hour 208 non-null float64 1 Seat_angle 208 non-null str dtypes: float64(1), str(1) memory usage: 3.4 KB
df_ratings_all.Seat_angle.isna().sum()
0
null_vals = ['', 'nan', 'N/A', None, 'na', 'None', 'none']
replaced_null = { item: np.nan for item in null_vals }
df_ratings_all['Seat_angle'].replace(replaced_null, inplace=True)
df_ratings_all.Seat_angle.isna().sum()
0
temp = df_ratings_all[~df_ratings_all['Seat_angle'].str.contains('[- ,]')]
temp.Seat_angle.info()
temp.Seat_angle = temp.Seat_angle.astype(float)
plt.hist(temp.Seat_angle);
fig = px.box(temp, y= 'Seat_angle', notched = True, )
#color = 'Weight_status')
# Setting boxmean="sd" displays both the mean (dashed line) and standard deviation (filled area)
fig.update_traces(
marker_line_color="black", # Sets the color of the lines bounding the box
boxmean="sd", # Sets the mean marker to a standard deviation shape
selector=dict(type='box')
)
fig.update_layout(width=300, height=500, yaxis_title = 'Seat angle for 174 subjects')
fig.show();
<class 'pandas.Series'> Index: 174 entries, 0 to 207 Series name: Seat_angle Non-Null Count Dtype -------------- ----- 174 non-null str dtypes: str(1) memory usage: 2.7 KB
# Create combined figure
fig, axes = plt.subplots(figsize=(6,4))
scatter_ax(axes, temp["Seat_angle"], temp["slope_hour"], "Seat angle", "Discomfort increase per hour vs Seat_angle")
######### Seat angle is not significant (p-value=0.090) at 5% but not at 1% significance level
| Dep. Variable: | y | R-squared: | 0.017 |
|---|---|---|---|
| Model: | OLS | Adj. R-squared: | 0.011 |
| Method: | Least Squares | F-statistic: | 2.903 |
| Date: | Thu, 22 Jan 2026 | Prob (F-statistic): | 0.0902 |
| Time: | 13:40:22 | Log-Likelihood: | 26.322 |
| No. Observations: | 174 | AIC: | -48.64 |
| Df Residuals: | 172 | BIC: | -42.33 |
| Df Model: | 1 | ||
| Covariance Type: | nonrobust |
| coef | std err | t | P>|t| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| const | 0.0533 | 0.074 | 0.721 | 0.472 | -0.093 | 0.199 |
| x1 | 0.0050 | 0.003 | 1.704 | 0.090 | -0.001 | 0.011 |
| Omnibus: | 36.600 | Durbin-Watson: | 1.892 |
|---|---|---|---|
| Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 53.317 |
| Skew: | 1.183 | Prob(JB): | 2.64e-12 |
| Kurtosis: | 4.326 | Cond. No. | 117. |
######### Sample of seat angles when it was not fixed ##########
df_ratings_all[df_ratings_all['Seat_angle'].str.contains('[- ,]')]['Seat_angle'][:3]
67 15-36 72 22-32 83 17-21 Name: Seat_angle, dtype: str
def mean_from_cell(cell):
if '-' in cell or ',' in cell:
numbers = list(map(int, re.split(r'[-,]', cell)))
return np.mean(numbers)
else:
return cell
df_ratings_all['Seat_angle'] = df_ratings_all['Seat_angle'].apply(lambda x: mean_from_cell(x))
df_ratings_all.Seat_angle = df_ratings_all.Seat_angle.astype(float)
plt.hist(df_ratings_all.Seat_angle);
fig = px.box(df_ratings_all, y= 'Seat_angle', notched = True, )
#color = 'Weight_status')
# Setting boxmean="sd" displays both the mean (dashed line) and standard deviation (filled area)
fig.update_traces(
marker_line_color="black", # Sets the color of the lines bounding the box
boxmean="sd", # Sets the mean marker to a standard deviation shape
selector=dict(type='box')
)
fig.update_layout(width=300, height=500, yaxis_title = 'Seat angle for 208 subjects')
fig.show();
# Create combined figure
fig, axes = plt.subplots(figsize=(6,4))
scatter_ax(axes, df_ratings_all["Seat_angle"], df_ratings_all["slope_hour"], "Seat angle", "Discomfort increase per hour vs Seat_angle")
######### Seat angle is significant (p-value = 0.027) at 5% but not at 1% significance level
| Dep. Variable: | y | R-squared: | 0.023 |
|---|---|---|---|
| Model: | OLS | Adj. R-squared: | 0.019 |
| Method: | Least Squares | F-statistic: | 4.944 |
| Date: | Thu, 22 Jan 2026 | Prob (F-statistic): | 0.0273 |
| Time: | 13:40:24 | Log-Likelihood: | 34.915 |
| No. Observations: | 208 | AIC: | -65.83 |
| Df Residuals: | 206 | BIC: | -59.15 |
| Df Model: | 1 | ||
| Covariance Type: | nonrobust |
| coef | std err | t | P>|t| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| const | 0.0321 | 0.067 | 0.480 | 0.632 | -0.100 | 0.164 |
| x1 | 0.0058 | 0.003 | 2.223 | 0.027 | 0.001 | 0.011 |
| Omnibus: | 38.370 | Durbin-Watson: | 1.945 |
|---|---|---|---|
| Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 54.137 |
| Skew: | 1.113 | Prob(JB): | 1.76e-12 |
| Kurtosis: | 4.137 | Cond. No. | 120. |